package com.xc.util;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;

public class Utils {
	public void GetHtmlToUrl(){
		try{   
            URL url = new URL("http://www.baidu.com");   
            URLConnection conn = url.openConnection();   
                
            BufferedReader is = new BufferedReader(new InputStreamReader(conn.getInputStream()));   
            StringBuffer buffer = new StringBuffer();   
            String str;   
            while((str = is.readLine()) != null){   
                buffer.append(str);   
                buffer.append("\n");   
                   
            }   
            str = buffer.toString().replaceAll("<script(.|\n)+?</script>", "");//.replaceAll("<(.|\n)+?>", "").replaceAll("&nbsp;", " ");   
            String[] s = str.split("\n");   
            buffer = new StringBuffer();   
            for(int i=0;i<s.length;i++){   
                if(s[i].trim().equals("") ){   
                    continue;   
                }else{   
                    buffer.append(s[i]);   
                    buffer.append("\n");   
                }   
            }   
            System.out.println(buffer.toString());   
               
            is.close();   
               
        }catch (Exception e) {   
            e.printStackTrace();   
        } 
	}
}
